package com.zc.bigdata.utils;

import org.apache.commons.lang.StringUtils;

import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * @program: hadoop-dfs
 * @description: LogParser
 * @author: zengchen
 * @create: 2020-06-03 09:49
 **/
public class LogParser {

    private static IPParser ipParser = IPParser.getInstance();

    public static Map<String, String> parseLog(String logLine){
        Map<String, String> resultMap = new HashMap<>();
        if(StringUtils.isNotBlank(logLine)){
            String[] splits = logLine.split("\001");

            String ip = splits[13];
            String url = splits[1];
            String sessionId = splits[10];
            String time = splits[17];

            resultMap.put("ip",ip);
            resultMap.put("url",url);
            resultMap.put("sessionId",sessionId);
            resultMap.put("time",time);

            IPParser.RegionInfo regionInfo = ipParser.analyseIp(ip);
            resultMap.put("country",regionInfo.getCountry());
            resultMap.put("province",regionInfo.getProvince());
            resultMap.put("city",regionInfo.getCity());
        }
        return resultMap;
    }

    public static String getPageId(String url){
        if(StringUtils.isNotBlank(url)){
            Pattern pattern = Pattern.compile("topicId=[0-9]+");
            Matcher matcher = pattern.matcher(url);
            if(matcher.find()){
                return matcher.group().split("topicId=")[1];
            }
        }
        return null;
    }
}
